home *** CD-ROM | disk | FTP | other *** search
- /* Copyright (c) 1991 Sun Wu and Udi Manber. All Rights Reserved. */
- #include "agrep.h"
-
- extern unsigned D_endpos, endposition, Init1, wildmask;
- extern Mask[], Bit[], Init[], NO_ERR_MASK;
- extern int AND, SIMPLEPATTERN, REGEX, NOUPPER, D_length;
- extern unsigned char Progname[];
-
- maskgen(Pattern, D)
- unsigned char *Pattern; int D;
- {
- struct term { int flag; unsigned char class[WORD];
- } position[WORD+10];
- unsigned char c;
-
- int i, j, k, l, M, OR=0, EVEN = 0, base, No_error;
-
-
- for(i=0; i<WORD; i++) position[i].class[0] = '\0';
- for(i=0; i<WORD; i++) position[i].flag = 0;
- wildmask = NO_ERR_MASK = endposition = 0;
- No_error = 0;
- M = strlen(Pattern);
- if(NOUPPER) {
- for(i=0; i<M; i++) if(isalpha(Pattern[i]))
- if (isupper(Pattern[i])) Pattern[i] = tolower(Pattern[i]);
- }
- #ifdef DEBUG
- for(i=0; i<M; i++) printf(" %d", Pattern[i]);
- printf("\n");
- #endif
- for (i=0, j=1; i< M; i++)
- {
- switch (Pattern[i])
- {
- case WILDCD : if(REGEX) {
- position[j].class[0] = '.';
- position[j].class[1] = '.';
- position[j++].class[2] = '\0';
- break;
- }
- wildmask = wildmask | Bit[j-1]; break;
- case STAR : break;
- case ORSYM : break;
- case LPARENT: break;
- case RPARENT: break;
- case LANGLE : No_error = ON; EVEN++;
- break;
- case RANGLE : No_error = OFF; EVEN--;
- if(EVEN < 0) {
- fprintf(stderr, "%s: illegal pattern, unmatched '<', '>'\n", Progname);
- exit(2);
- }
- break;
- case LRANGE : if(No_error == ON) NO_ERR_MASK = NO_ERR_MASK | Bit[j];
- i=i+1;
- if (Pattern[i] == NOTSYM) { position[j].flag = Compl; i++; }
- k=0;
- while (Pattern[i] != RRANGE && i < M)
- {
- if(Pattern[i] == HYPHEN)
- { position[j].class[k-1] = Pattern[i+1]; i=i+2; }
- else {
- position[j].class[k] = position[j].class[k+1] = Pattern[i];
- k = k+2; i++;
- }
- }
- if(i == M) {
- fprintf(stderr, "%s: illegal pattern, unmatched '[', ']'\n",Progname);
- exit(2);
- }
- position[j].class[k] = '\0';
- j++; break;
- case RRANGE : fprintf(stderr, "%s: illegal pattern, unmatched '[', ']'\n", Progname);
- exit(2);
- break;
- case ORPAT : if(REGEX == ON || AND == ON) {
- fprintf(stderr, "illegal pattern \n");
- exit(2);
- }
- OR = ON;
- position[j].flag = 2; position[j].class[0] = '\0';
- endposition = endposition | Bit[j++]; break;
- case ANDPAT : position[j].flag = 2; position[j].class[0] = '\0';
- if(j > D_length) AND = ON;
- if(OR || (REGEX == ON && j>D_length)) {
- fprintf(stderr, "illegal pattern \n");
- exit(2);
- }
- endposition = endposition | Bit[j++]; break;
- /*
- case ' ' : if (Pattern[i-1] == ORPAT || Pattern[i-1] == ANDPAT) break;
- if(No_error == ON) NO_ERR_MASK = NO_ERR_MASK | Bit[j];
- position[j].flag = 0;
- position[j].class[0] = position[j].class[1] = Pattern[i];
- position[j++].class[2] = '\0'; break;
- */
- case '\n' : NO_ERR_MASK = NO_ERR_MASK | Bit[j];
- position[j].class[0] = position[j].class[1] = '\n';
- position[j++].class[2] = '\0';
- break;
- case WORDB : NO_ERR_MASK = NO_ERR_MASK | Bit[j];
- position[j].class[0] = 1;
- position[j].class[1] = 47;
- position[j].class[2] = 58;
- position[j].class[3] = 64;
- position[j].class[4] = 91;
- position[j].class[5] = 96;
- position[j].class[6] = 123;
- position[j].class[7] = 127;
- position[j++].class[8] = '\0';
- break;
- case NNLINE : NO_ERR_MASK |= Bit[j];
- position[j].class[0] = position[j].class[1] = '\n';
- position[j].class[2] = position[j].class[3] = NNLINE;
- position[j++].class[4] = '\0';
- break;
- default : if(No_error == ON) NO_ERR_MASK = NO_ERR_MASK | Bit[j];
- position[j].flag = 0;
- position[j].class[0] = position[j].class[1] = Pattern[i];
- position[j++].class[2] = '\0';
- }
- if(j > WORD) {
- fprintf(stderr, "%s: pattern too long\n", Progname);
- exit(2);
- }
- }
- if (EVEN != 0) {
- fprintf(stderr, "%s: illegal pattern, unmatched '<', '>'\n", Progname);
- exit(2);
- }
- M = j - 1;
- base = WORD - M;
- wildmask = (wildmask >> base);
- endposition = (endposition >> base);
- NO_ERR_MASK = (NO_ERR_MASK >> 1) & (~Bit[1]);
- NO_ERR_MASK = ~NO_ERR_MASK >> (base-1);
- for (i=1; i<= WORD - M ; i++) Init[0] = Init[0] | Bit[i];
- Init[0] = Init[0] | endposition;
- /* not necessary for INit[i], i>0, */
- /* but at every begining of the matching process append one
- no-match character to initialize the error vectors */
- endposition = ( endposition << 1 ) + 1;
- Init1 = (Init[0] | wildmask | endposition) ;
- D_endpos = ( endposition >> ( M - D_length ) ) << ( M - D_length);
- endposition = endposition ^ D_endpos;
- #ifdef DEBUG
- printf("endposition: %o\n", endposition);
- printf("no_err_mask: %o\n", NO_ERR_MASK);
- #endif
- for(c=0, i=0; i < MAXSYM; c++, i++)
- {
- for (k=1, l=0; k<=M ; k++, l=0) {
- while (position[k].class[l] != '\0') {
- if (position[k].class[l] == NOCARE && (c != '\n' || REGEX) )
- { Mask[c] = Mask[c] | Bit[base + k]; break; }
- if (c >= position[k].class[l] && c <= position[k].class[l+1])
- { Mask[c] = Mask[c] | Bit[base + k]; break; }
- l = l + 2; }
- if (position[k].flag == Compl) Mask[c] = Mask[c] ^ Bit[base+k];
- }
- }
- if(NOUPPER) for(c='A'; c<='Z'; c=c+1) if (isupper(c))
- Mask[c] = Mask[tolower(c)];
- return(M);
- }
-
-